#!/usr/bin/python
# -*- coding: UTF-8 -*-
#上面这行是为了让中文能正常地写在.py文件里面
import codecs
import csv
import requests
from bs4 import BeautifulSoup ## -*- coding: UTF-8 -*-

#request requests
def getHTML(url):
    r = requests.get(url)
    return r.content
	
#parser bs4

def parseHTML(html):
	soup = BeautifulSoup(html,'html.parser')
	
	body = soup.body
	company_middle = body.find('div',attrs={'class':'middle'})
	company_list_ct = company_middle.find('div',attrs={'class':'list-ct'})
	
	company_list = []
	for company_ul in company_list_ct.find_all('ul',attrs={'class':'company-list'}):
		for company_li in company_ul.find_all('li'):
			company_url = company_li.a['href']
			company_info = company_li.get_text()
			company_list.append([company_info.encode('utf-8'),company_url.encode('utf-8')])
			#print company_info,company_url
	return company_list 

#save csv
def writeCSV(file_name,data_list):
	with open(file_name,'wb') as f: #第一个参数是文件名，第二个参数是写入方法，第三个参数是编码。读写方法常用的有几个，'r'是读，'w'是写，'a'是追加写入，后面加一个'b'是代表二进制读写。 with...as...是一个上下文管理协议 自动管理文件打开关闭
		writer = csv.writer(f)
		for data in data_list:
			writer.writerow(data)


if __name__=="__main__":
	URL = 'http://www.cninfo.com.cn/cninfo-new/information/companylist'
	html = getHTML(URL)
	data_list = parseHTML(html)
	writeCSV('test.csv',data_list)
	